In [1]:
#!/Tsan/bin/python
# -*- coding: utf-8 -*-

In [2]:
# Libraries to use
from __future__ import division 
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
import mysql.connector


c:\python27\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

In [3]:
# Import My own library for factor testing
from SingleFactorTest import factorFilterFunctions as ff
#from config import *

In [4]:
%matplotlib inline
%load_ext line_profiler

In [25]:
riskFreeRate = 0.02
varThreshold =0.05
scaleParameter = 50

In [26]:
with open('conf.json', 'r') as fd:
    conf = json.load(fd)
src_db = mysql.connector.connect(**conf['src_db'])

In [27]:
# 表名
index_data_table = 'fund_weekly_index'  # index时间序列数据
index_name_table = 'index_id_name_mapping'
type_index_table = 'index_stype_code_mapping' # 表格名称-不同基金种类对应的指数

In [28]:
# 私募指数基金分类表格对应(只需要跑一次)
def get_type_index_table(tableName = type_index_table):

    try:
        #sql_query='select id,name from student where  age > %s' 
        cursor = src_db .cursor()
        sql = "select * from %s" % (tableName)
        cursor.execute(sql)
        result = cursor.fetchall()
    finally:
        pass
    #pdResult = dict(result)
    pdResult = pd.DataFrame(result)
    pdResult = pdResult.dropna(axis=0)
    pdResult.columns = [i[0] for i in cursor.description]
    pdResult.set_index('stype_code',inplace=True)
    return pdResult

In [29]:
# 私募指数名称及ID分类表格对应(只需要跑一次)
def get_index_table(tableName = index_name_table):

    try:
        #sql_query='select id,name from student where  age > %s' 
        cursor = src_db .cursor()
        sql = "select * from %s" % (tableName)
        cursor.execute(sql)
        result = cursor.fetchall()
    finally:
        pass
    #pdResult = dict(result)
    pdResult = pd.DataFrame(result)
    pdResult = pdResult.dropna(axis=0)
    pdResult.columns = [i[0] for i in cursor.description]
    pdResult.set_index('index_id',inplace=True)
    return pdResult

In [30]:
# 私募指数净值的时间序列
def get_index(index,tableName =index_data_table):
    

    try:
        #sql_query='select id,name from student where  age > %s' 
        cursor = src_db.cursor()
        sql = "select index_id,statistic_date,index_value from %s where index_id = '%s'" % (tableName,index)
        cursor.execute(sql)
        result = cursor.fetchall()
    finally:
        pass
    pdResult = pd.DataFrame(result,dtype =float)
    pdResult.columns = ['index','date','net_worth']
    pdResult = pdResult.drop_duplicates().set_index('date')
    pdResult = pdResult.dropna(axis=0)
    pdResult = pdResult.fillna(method = 'ffill')
    return pdResult

In [31]:
indexIDdf = get_index_table()
indexIDdf


Out[31]:
index_name
index_id
FI01 私募全市场指数
FI02 阳光私募指数
FI03 私募FOF指数
FI04 股票多头策略私募指数
FI05 股票多空策略私募指数
FI06 市场中性策略私募指数
FI07 债券基金私募指数
FI08 管理期货策略私募指数
FI09 宏观策略私募指数
FI10 事件驱动策略私募指数
FI11 相对价值策略私募指数
FI12 多策略私募指数
FI13 组合投资策略私募指数

allfundindex = get_index(indexIDdf.index[0],tableName =index_data_table) allfundindex['year'] = allfundindex.index.map(lambda x : x.year) allfundindex['month'] = allfundindex.index.map(lambda x : (x.year,x.month))

groupgeneratorbymonth = allfundindex.groupby('month') groupgeneratorbyyear = allfundindex.groupby('year') annual_pnl_FI01 = (groupgeneratorbyyear['net_worth'].last() - groupgeneratorbyyear['net_worth'].first())/groupgeneratorbyyear['net_worth'].first() monthly_pnl_FI01 = (groupgeneratorbymonth['net_worth'].last() - groupgeneratorbymonth['net_worth'].first())/groupgeneratorbymonth['net_worth'].first()

转换成dataframe 年

annual_pnl_FI01 = pd.DataFrame(annual_pnl_FI01) annual_pnl_FI01.columns = [indexIDdf.index[0]] annual_pnl_FI01 = annual_pnl_FI01.T.round(5)

转换成dataframe 月

monthly_pnl_FI01 = pd.DataFrame(monthly_pnl_FI01).round(5) monthly_pnl_FI01.columns = [indexIDdf.index[0]] monthly_pnl_FI01 = monthly_pnl_FI01.T


In [32]:
annual_pnl_FI01


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-7ca3e6008899> in <module>()
----> 1 annual_pnl_FI01

NameError: name 'annual_pnl_FI01' is not defined

In [ ]:
monthly_pnl_FI01

In [33]:
# 按季度分类
def byseasons(x):
    if 1<=x.month<=3:
        return str(x.year)+'_'+str(1)
    elif 4<= x.month <=6:
        return str(x.year)+'_'+str(2)
    elif 7<= x.month <=9:
        return str(x.year)+'_'+str(3)
    else:
        return str(x.year)+'_'+str(4)

In [34]:
allfundindex['season'] = allfundindex.index.map(byseasons)
allfundindex['pnl'] = allfundindex['net_worth'].pct_change()

In [35]:
# 计算最大回撤,最大回撤开始结束时间
def cal_max_dd_indicator(networthSeries):
    maxdd = pd.DataFrame(index = networthSeries.index, data=None, columns =['max_dd','max_dd_start_date','max_dd_end_date'],dtype = float)
    maxdd.iloc[0] = 0
    maxdd.is_copy = False
    for date in networthSeries.index[1:]:
        maxdd.loc[date] = [1 - networthSeries.loc[date] / networthSeries.loc[:date].max(),networthSeries.loc[:date].idxmax(),date]
        #maxdd[['max_dd_start_date','max_dd_end_date']].loc[date] = [[networthSeries.loc[:date].idxmax(),date]]
        #maxdd['max_dd_start_date'].loc[date] = networthSeries.loc[:date].idxmax()
    return maxdd['max_dd'].max(), maxdd.loc[maxdd['max_dd'].idxmax]['max_dd_start_date'],maxdd.loc[maxdd['max_dd'].idxmax]['max_dd_end_date']

In [36]:
seasonList = sorted(list(set(allfundindex['season'].values)))
#maxdd =pd.DataFrame(index = seasonList,columns = [indexIDdf.index[0]],dtype = float)
maxdd_dict = {}
for season in seasonList:
    temp = allfundindex[allfundindex['season'] == season]
    maxdd_dict[season] = np.round(cal_max_dd_indicator(temp['net_worth'])[0],5)

In [37]:
maxdd_df = pd.DataFrame([maxdd_dict]).T
maxdd_df.columns =[indexIDdf.index[0]]
maxdd_df.index.name = 'season'
maxdd_df


Out[37]:
FI01
season
2007_1 0.03349
2007_2 0.01549
2007_3 0.01665
2007_4 0.06330
2008_1 0.13768
2008_2 0.14073
2008_3 0.12829
2008_4 0.04867
2009_1 0.03815
2009_2 0.01448
2009_3 0.08397
2009_4 0.03589
2010_1 0.05217
2010_2 0.08238
2010_3 0.01218
2010_4 0.03529
2011_1 0.05330
2011_2 0.08455
2011_3 0.08319
2011_4 0.07532
2012_1 0.02173
2012_2 0.02440
2012_3 0.04430
2012_4 0.05528
2013_1 0.02533
2013_2 0.04550
2013_3 0.00668
2013_4 0.04370
2014_1 0.02371
2014_2 0.02199
2014_3 0.00520
2014_4 0.01611
2015_1 0.01477
2015_2 0.10056
2015_3 0.07149
2015_4 0.01511
2016_1 0.08489
2016_2 0.01996
2016_3 0.01167
2016_4 0.02425
2017_1 0.01239
2017_2 0.03261

In [38]:
# 计算最大回撤(每季度)
def cal_maxdd_by_season(df):
    seasonList = sorted(list(set(df['season'].values)))
    #maxdd =pd.DataFrame(index = seasonList,columns = [indexIDdf.index[0]],dtype = float)
    maxdd_dict = {}
    for season in seasonList:
        temp = df[df['season'] == season]
        maxdd_dict[season] = np.round(cal_max_dd_indicator(temp['net_worth'])[0],4)
    maxdd_df = pd.DataFrame([maxdd_dict]).T
    maxdd_df.columns =[df['index'].iloc[0]]
    maxdd_df.index.name = 'season'
    return maxdd_df

In [39]:
# 计算最大回撤(每年)
def cal_maxdd_by_year(df):
    seasonList = sorted(list(set(df['year'].values)))
    #maxdd =pd.DataFrame(index = seasonList,columns = [indexIDdf.index[0]],dtype = float)
    maxdd_dict = {}
    for season in seasonList:
        temp = df[df['year'] == season]
        maxdd_dict[season] = np.round(cal_max_dd_indicator(temp['net_worth'])[0],4)
    maxdd_df = pd.DataFrame([maxdd_dict]).T
    maxdd_df.columns =[df['index'].iloc[0]]
    maxdd_df.index.name = 'year'
    return maxdd_df

In [40]:
# 计算季度指标
maxddbyseason = pd.DataFrame()  # 季度最大回撤
retbyseason = pd.DataFrame()    # 季度收益
stdbyseason = pd.DataFrame()    # 极度标准差
sharpebyseason = pd.DataFrame()  # 季度夏普

# 计算年度指标
maxddbyyear= pd.DataFrame()   # 年度最大回撤
retbyyear = pd.DataFrame()    # 年度收益
stdbyyear = pd.DataFrame()    # 年度标准差
sharpebyyear = pd.DataFrame()  # 年度夏普


for index in indexIDdf.index:
    # 季度指标
    indexdf =  get_index(index,tableName =index_data_table)
    indexdf['pnl'] = indexdf['net_worth'].pct_change() 
    indexdf['season'] = indexdf.index.map(byseasons)
    indexdf['year'] = indexdf.index.map(lambda x : x.year)
    maxdd_season = cal_maxdd_by_season(indexdf)
    maxddbyseason = maxddbyseason.merge(maxdd_season,how='outer',left_index=True,right_index =True)
    
    indexbyseason = indexdf.groupby('season')['pnl']
    ret_season = (indexbyseason .mean()+1)**scaleParameter - 1   # 年化收益(季度)    
    std_season = np.sqrt(scaleParameter) * indexbyseason.std()   # 年化标准差(季度)
    sharpe_season = (ret_season - riskFreeRate) / std_season     #  夏普比率(季度)
    ret_season = pd.DataFrame(ret_season).round(4)                        # series 转换为 dataframe
    ret_season.columns =[indexdf['index'].iloc[0]]               # 添加列名
    std_season = pd.DataFrame(std_season).round(4)                        
    std_season.columns =[indexdf['index'].iloc[0]]
    sharpe_season = pd.DataFrame(sharpe_season).round(4)
    sharpe_season.columns = [indexdf['index'].iloc[0]]
    
    retbyseason  = retbyseason.merge(ret_season,how='outer',left_index=True,right_index =True)
    stdbyseason = stdbyseason .merge(std_season,how='outer',left_index=True,right_index =True)
    sharpebyseason = sharpebyseason.merge(sharpe_season,how='outer',left_index=True,right_index =True)
    
    # 年度指标 
    maxdd_year = cal_maxdd_by_year(indexdf)
    maxddbyyear = maxddbyyear.merge(maxdd_year ,how='outer',left_index=True,right_index =True)
    
    indexbyyear = indexdf.groupby('year')['pnl']
    ret_year = (indexbyyear .mean()+1)**scaleParameter - 1   # 年化收益(季度)    
    std_year = np.sqrt(scaleParameter) * indexbyyear.std()   # 年化标准差(季度)
    sharpe_year = (ret_year - riskFreeRate) / std_year    #  夏普比率(季度)
    ret_year = pd.DataFrame(ret_year).round(4)                        # series 转换为 dataframe
    ret_year.columns =[indexdf['index'].iloc[0]]               # 添加列名
    std_year = pd.DataFrame(std_year).round(4)                        
    std_year.columns =[indexdf['index'].iloc[0]]
    sharpe_year = pd.DataFrame(sharpe_year).round(4)
    sharpe_year.columns = [indexdf['index'].iloc[0]]
    
    retbyyear = retbyyear.merge(ret_year,how='outer',left_index=True,right_index =True)
    stdbyyear = stdbyyear .merge(std_year,how='outer',left_index=True,right_index =True)
    sharpebyyear = sharpebyyear.merge(sharpe_year,how='outer',left_index=True,right_index =True)

In [41]:
indexIDdf.index[3:]


Out[41]:
Index([u'FI04', u'FI05', u'FI06', u'FI07', u'FI08', u'FI09', u'FI10', u'FI11',
       u'FI12', u'FI13'],
      dtype='object', name=u'index_id')

In [42]:
retbyseason.tail()


Out[42]:
FI01 FI02 FI03 FI04 FI05 FI06 FI07 FI08 FI09 FI10 FI11 FI12 FI13
season
2016_2 0.0539 0.0114 0.0173 0.0238 0.0279 0.0171 0.0297 0.2570 0.1027 0.1113 0.0509 0.0521 0.0129
2016_3 0.1306 0.1651 0.0766 0.1619 0.1131 0.0603 0.0896 0.1162 0.1621 0.2374 0.1257 0.1227 0.0747
2016_4 0.1175 0.0245 0.0385 0.0362 0.0236 0.0552 0.1045 0.6488 0.1016 -0.0006 0.0333 0.0778 0.0099
2017_1 0.0911 0.1348 0.0856 0.1389 0.0809 0.0260 0.0508 -0.0214 0.0069 0.1214 0.0669 0.0746 0.0858
2017_2 -0.1481 -0.1537 -0.0881 -0.1825 -0.0594 -0.1439 -0.0337 -0.1557 -0.0852 -0.3029 -0.0602 -0.1247 -0.1078

In [43]:
retbyyear.tail()


Out[43]:
FI01 FI02 FI03 FI04 FI05 FI06 FI07 FI08 FI09 FI10 FI11 FI12 FI13
year
2013 0.1010 0.1107 NaN 0.1071 0.2313 0.0796 0.0291 NaN NaN 0.1931 NaN NaN 0.1294
2014 0.2186 0.2318 NaN 0.2631 0.1402 0.0522 0.1079 0.6473 0.7113 0.3824 0.2114 0.2395 0.2131
2015 0.2892 0.2802 0.1695 0.3446 0.3931 0.2455 0.0953 0.4349 0.5697 0.6690 0.2953 0.2953 0.2157
2016 0.0292 -0.0492 -0.0022 -0.0410 -0.0139 0.0210 0.0731 0.3124 0.0681 0.0216 0.0606 0.0266 -0.0299
2017 -0.0201 -0.0009 0.0064 -0.0137 0.0175 -0.0516 0.0132 -0.0822 -0.0342 -0.0875 0.0097 -0.0170 -0.0029

In [44]:
stdbyseason.tail()


Out[44]:
FI01 FI02 FI03 FI04 FI05 FI06 FI07 FI08 FI09 FI10 FI11 FI12 FI13
season
2016_2 0.0495 0.0759 0.0256 0.0804 0.0508 0.0240 0.0084 0.0730 0.0324 0.1043 0.0248 0.0499 0.0294
2016_3 0.0495 0.0627 0.0276 0.0685 0.0413 0.0224 0.0057 0.0746 0.0579 0.0797 0.0283 0.0456 0.0318
2016_4 0.0593 0.1294 0.0311 0.0671 0.0468 0.0348 0.0538 0.1443 0.0500 0.0635 0.0267 0.0476 0.0332
2017_1 0.0358 0.0475 0.0206 0.0509 0.0409 0.0275 0.0093 0.0407 0.0429 0.0608 0.0195 0.0315 0.0268
2017_2 0.0456 0.0626 0.0211 0.0643 0.0620 0.0294 0.0086 0.0368 0.0468 0.0796 0.0241 0.0358 0.0264

In [45]:
stdbyyear.tail()


Out[45]:
FI01 FI02 FI03 FI04 FI05 FI06 FI07 FI08 FI09 FI10 FI11 FI12 FI13
year
2013 0.0928 0.1147 NaN 0.1232 0.0852 0.0272 0.0181 NaN NaN 0.1392 NaN NaN 0.0868
2014 0.0681 0.0978 NaN 0.1090 0.0603 0.0607 0.0167 0.1074 0.1308 0.1141 0.0482 0.0589 0.0809
2015 0.1628 0.2186 0.0857 0.2373 0.1090 0.0619 0.0136 0.0864 0.1301 0.2132 0.0585 0.1286 0.1264
2016 0.0803 0.1206 0.0397 0.1175 0.0666 0.0310 0.0279 0.0960 0.1034 0.1384 0.0314 0.0664 0.0518
2017 0.0432 0.0573 0.0239 0.0605 0.0509 0.0306 0.0107 0.0396 0.0441 0.0759 0.0230 0.0358 0.0296

In [46]:
sharpebyseason.tail()


Out[46]:
FI01 FI02 FI03 FI04 FI05 FI06 FI07 FI08 FI09 FI10 FI11 FI12 FI13
season
2016_2 0.6860 -0.1135 -0.1060 0.0470 0.1556 -0.1202 1.1566 3.2465 2.5528 0.8756 1.2462 0.6433 -0.2431
2016_3 2.2347 2.3157 2.0496 2.0720 2.2546 1.7971 12.2487 1.2893 2.4547 2.7278 3.7374 2.2541 1.7170
2016_4 1.6445 0.0351 0.5952 0.2412 0.0769 1.0126 1.5702 4.3584 1.6321 -0.3244 0.5000 1.2156 -0.3053
2017_1 1.9843 2.4164 3.1869 2.3362 1.4885 0.2188 3.2981 -1.0165 -0.3048 1.6674 2.4036 1.7319 2.4572
2017_2 -3.6833 -2.7729 -5.1296 -3.1513 -1.2793 -5.5681 -6.2699 -4.7800 -2.2469 -4.0563 -3.3353 -4.0368 -4.8447

In [47]:
sharpebyyear.tail()


Out[47]:
FI01 FI02 FI03 FI04 FI05 FI06 FI07 FI08 FI09 FI10 FI11 FI12 FI13
year
2013 0.8730 0.7903 NaN 0.7070 2.4813 2.1920 0.5037 NaN NaN 1.2437 NaN NaN 1.2601
2014 2.9170 2.1660 NaN 2.2313 1.9932 0.5304 5.2705 5.8411 5.2859 3.1752 3.9750 3.7258 2.3857
2015 1.6536 1.1900 1.7444 1.3677 3.4226 3.6403 5.5553 4.8044 4.2257 3.0444 4.7014 2.1405 1.5481
2016 0.1140 -0.5735 -0.5583 -0.5190 -0.5092 0.0338 1.9003 3.0446 0.4651 0.0118 1.2913 0.0989 -0.9639
2017 -0.9276 -0.3642 -0.5686 -0.5573 -0.0485 -2.3394 -0.6370 -2.5803 -1.2277 -1.4160 -0.4500 -1.0322 -0.7760

In [48]:
maxddbyseason.tail()


Out[48]:
FI01 FI02 FI03 FI04 FI05 FI06 FI07 FI08 FI09 FI10 FI11 FI12 FI13
season
2016_2 0.0200 0.0350 0.0136 0.0372 0.0233 0.0118 0.0029 0.0069 0.0052 0.0394 0.0082 0.0237 0.0146
2016_3 0.0117 0.0152 0.0062 0.0175 0.0092 0.0048 0.0000 0.0173 0.0125 0.0198 0.0031 0.0103 0.0082
2016_4 0.0242 0.0430 0.0154 0.0340 0.0221 0.0151 0.0049 0.0170 0.0201 0.0362 0.0104 0.0184 0.0188
2017_1 0.0124 0.0148 0.0057 0.0170 0.0163 0.0119 0.0010 0.0166 0.0142 0.0270 0.0056 0.0130 0.0081
2017_2 0.0326 0.0386 0.0189 0.0426 0.0245 0.0287 0.0074 0.0320 0.0246 0.0675 0.0169 0.0271 0.0220

In [49]:
maxddbyyear.tail()


Out[49]:
FI01 FI02 FI03 FI04 FI05 FI06 FI07 FI08 FI09 FI10 FI11 FI12 FI13
year
2013 0.0609 0.0792 NaN 0.0842 0.0334 0.0124 0.0233 NaN NaN 0.1153 NaN NaN 0.0558
2014 0.0263 0.0568 NaN 0.0646 0.0343 0.0677 0.0051 0.0178 0.0374 0.0514 0.0122 0.0141 0.0539
2015 0.1843 0.2633 0.1210 0.2788 0.0791 0.0174 0.0046 0.0263 0.0477 0.1880 0.0198 0.1346 0.1643
2016 0.0849 0.1240 0.0483 0.1327 0.0693 0.0245 0.0049 0.0173 0.0620 0.1336 0.0204 0.0648 0.0675
2017 0.0326 0.0386 0.0207 0.0426 0.0245 0.0314 0.0074 0.0471 0.0246 0.0702 0.0169 0.0271 0.0226

In [50]:
for row in maxddbyyear.iterrows():
    1
print row[1].index.tolist()


[u'FI01', u'FI02', u'FI03', u'FI04', u'FI05', u'FI06', u'FI07', u'FI08', u'FI09', u'FI10', u'FI11', u'FI12', u'FI13']

In [51]:
[row[0]]+ row[1].index.tolist()


Out[51]:
[2017,
 u'FI01',
 u'FI02',
 u'FI03',
 u'FI04',
 u'FI05',
 u'FI06',
 u'FI07',
 u'FI08',
 u'FI09',
 u'FI10',
 u'FI11',
 u'FI12',
 u'FI13']

In [52]:
row[1].values.tolist()


Out[52]:
[0.0326,
 0.0386,
 0.0207,
 0.0426,
 0.0245,
 0.0314,
 0.0074,
 0.0471,
 0.0246,
 0.0702,
 0.0169,
 0.0271,
 0.0226]

In [53]:
field_names = ('field1', 'field2', 'field3')
','.join(field_names)


Out[53]:
'field1,field2,field3'

In [54]:
##  数据库不支持numpy.float64形式,要转化成python的格式!!
class NumpyMySQLConverter(mysql.connector.conversion.MySQLConverter):
    """ A mysql.connector Converter that handles Numpy types """

    def _float32_to_mysql(self, value):
        return float(value)

    def _float64_to_mysql(self, value):
        return float(value)

    def _int32_to_mysql(self, value):
        return int(value)

    def _int64_to_mysql(self, value):
        return int(value)

res_db = mysql.connector.connect(**conf['res_db'])
res_db.set_converter_class(NumpyMySQLConverter)
cur = res_db.cursor()

In [55]:
# 插入list到mysql数据库,注意表必须是已经建好的,且!
def insert_one_row(datalist):
    '''datalist 必须是list形式,并且已包含了index,实际数据长度应与数据库里一行的数据长度一样!'''
    res_db = mysql.connector.connect(**conf['res_db'])
    res_db.set_converter_class(NumpyMySQLConverter)  # 将numpy形式数据转换为python数据
    cur = res_db.cursor()
    cur.execute('INSERT index_maxdd_by_year SET '
    'year = %s, FI01=%s, FI02=%s, FI03=%s, FI04=%s,'
    'FI05=%s, FI06=%s, FI07=%s, FI08=%s, FI09=%s,'
    'FI10=%s, FI11=%s, FI12=%s, FI13=%s',
      datalist)
    res_db.commit()
    cur.close()
    res_db.close()

In [56]:
#map(lambda x: x+'=%s',['s','d'])

In [57]:
#insert_one_row([2021]+list(np.random.rand(13)))

In [58]:
def upsert_indexdf(df):
    res_db = mysql.connector.connect(**conf['res_db'])
    res_db.set_converter_class(NumpyMySQLConverter)  # 将numpy形式数据转换为python数据
    cur = res_db.cursor()
    for row in df.iterrows():
        row[1][pd.isnull(row[1])] = None
        cur.execute('INSERT index_maxdd_by_year SET '
        'year = %s, FI01=%s, FI02=%s, FI03=%s, FI04=%s,'
        'FI05=%s, FI06=%s, FI07=%s, FI08=%s, FI09=%s,'
        'FI10=%s, FI11=%s, FI12=%s, FI13=%s',
          [row[0]]+ row[1].values.tolist())
    res_db.commit()
    cur.close()
    res_db.close()

In [59]:
cur.execute('INSERT index_maxdd_by_year SET '
            'year = %s, FI01=%s, FI02=%s, FI03=%s, FI04=%s,'
            'FI05=%s, FI06=%s, FI07=%s, FI08=%s, FI09=%s,'
            'FI10=%s, FI11=%s, FI12=%s, FI13=%s',
              [row[0]]+ row[1].values.tolist())
#res_db.commit() commit 之后会将之前的语句全部执行!!

In [60]:
np.random.rand(13)


Out[60]:
array([ 0.8340094 ,  0.36577316,  0.52454435,  0.48703197,  0.5441111 ,
        0.55946241,  0.05801314,  0.93808221,  0.94809165,  0.30055551,
        0.64884269,  0.55725807,  0.95052718])

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [61]:
def upsert_indexdf(df):
    res_db = mysql.connector.connect(**conf['res_db'])
    cur = res_db.cursor()
    for row in df.iterrows():
        row[1][pd.isnull(row[1])] = None
        cur.execute(
            'INSERT fund_indexdf SET '
            'fund_id = %s, fund_name=%s, fund_manager_nominal=%s, foundation_date=%s, end_date=%s,'
            'fund_status=%s, type_name=%s, fund_manager=%s, data_end_date=%s, cum_net=%s,'
            'return_annualized=%s, max_dd=%s, max_dd_start_date=%s, max_dd_end_date=%s,'
            'std_annualized=%s, downsideRisk_annualized=%s, odds=%s, sharpe_ratio=%s, '
            'calmar_ratio=%s,sortino_ratio=%s, alpha=%s, beta_2=%s, Beta=%s, Value=%s, '
            'EarningYield=%s, Growth=%s, Leverage=%s, Liquidity=%s, Momentum=%s, '
            'NonLinearSize=%s, Size=%s, Volatility=%s'

            'ON DUPLICATE KEY UPDATE '
            'fund_name=%s, fund_manager_nominal=%s, foundation_date=%s, end_date=%s,'
            'fund_status=%s, type_name=%s, fund_manager=%s, data_end_date=%s, cum_net=%s,'
            'return_annualized=%s, max_dd=%s, max_dd_start_date=%s, max_dd_end_date=%s,'
            'std_annualized=%s, downsideRisk_annualized=%s, odds=%s, sharpe_ratio=%s, '
            'calmar_ratio=%s,sortino_ratio=%s, alpha=%s, beta_2=%s, Beta=%s, Value=%s, '
            'EarningYield=%s, Growth=%s, Leverage=%s, Liquidity=%s, Momentum=%s, '
            'NonLinearSize=%s, Size=%s, Volatility=%s',
            [row[0]] + row[1].tolist() * 2
        )
    res_db.commit()
    cur.close()
    res_db.close()

insert the whole dataframe into mysql database

from sqlalchemy import create_engine import sqlalchemy.engine.url as url

MSText(length=255)

db_engine = create_engine('mysql://{0}:{1}@{2}:{3}/{4}'.format('tai', 'tai2015', '119.254.153.20', 13311, 'PrivateEquityFund_W',encoding='utf-8'))

季度指标

maxddbyseason.reset_index().to_sql(name='index_maxdd_by_season',con=db_engine, if_exists='replace',index=False) retbyseason.reset_index().to_sql(name='index_return_by_season',con=db_engine, if_exists='replace',index=False) stdbyseason.reset_index().to_sql(name='index_std_by_season',con=db_engine, if_exists='replace',index=False) sharpebyseason.reset_index().to_sql(name='index_sharpe_by_season',con=db_engine, if_exists='replace',index=False)

年度指标

maxddbyyear.reset_index().to_sql(name='index_maxdd_by_year',con=db_engine, if_exists='replace',index=False) retbyyear.reset_index().to_sql(name='index_return_by_year',con=db_engine, if_exists='replace',index=False) stdbyyear.reset_index().to_sql(name='index_std_by_year',con=db_engine, if_exists='replace',index=False) sharpebyyear.reset_index().to_sql(name='index_sharpe_by_year',con=db_engine, if_exists='replace',index=False)


In [62]:
gene = allfundindex.groupby('season')['pnl']

In [63]:
(0.017056+1)**50 -1


Out[63]:
1.3293959224126208

In [ ]:


In [64]:
((gene .mean()+1)**scaleParameter-1)


Out[64]:
season
2007_1    1.329423
2007_2    1.802057
2007_3    1.350133
2007_4    0.347197
2008_1   -0.276712
2008_2   -0.380620
2008_3   -0.282053
2008_4    0.107480
2009_1    0.675435
2009_2    0.449512
2009_3    0.073090
2009_4    0.512312
2010_1    0.061560
2010_2   -0.152796
2010_3    0.252125
2010_4    0.332824
2011_1    0.025179
2011_2   -0.249501
2011_3   -0.144502
2011_4   -0.181610
2012_1    0.123108
2012_2   -0.001949
2012_3   -0.150151
2012_4    0.164156
2013_1    0.283439
2013_2   -0.012452
2013_3    0.190026
2013_4   -0.020567
2014_1    0.069291
2014_2    0.098561
2014_3    0.408178
2014_4    0.332207
2015_1    0.650382
2015_2    0.626736
2015_3   -0.297659
2015_4    0.457796
2016_1   -0.157984
2016_2    0.053932
2016_3    0.130575
2016_4    0.117480
2017_1    0.091089
2017_2   -0.148138
Name: pnl, dtype: float64

In [65]:
((gene .mean()+1)**scaleParameter-1)/ (np.sqrt(scaleParameter) * gene.std())


Out[65]:
season
2007_1     5.938120
2007_2    13.596550
2007_3     6.202194
2007_4     2.101493
2008_1    -1.337822
2008_2    -1.370750
2008_3    -2.048127
2008_4     0.696472
2009_1     4.821509
2009_2     4.665123
2009_3     0.492920
2009_4     2.929563
2010_1     0.590307
2010_2    -1.183197
2010_3     2.096857
2010_4     2.470616
2011_1     0.258059
2011_2    -2.178449
2011_3    -1.487684
2011_4    -1.396105
2012_1     1.389152
2012_2    -0.018253
2012_3    -1.374202
2012_4     1.589285
2013_1     2.428695
2013_2    -0.118950
2013_3     2.938627
2013_4    -0.238157
2014_1     0.908262
2014_2     1.660880
2014_3     7.569055
2014_4     4.217043
2015_1     7.163520
2015_2     2.635607
2015_3    -1.677944
2015_4     6.145050
2016_1    -1.185653
2016_2     1.090347
2016_3     2.638919
2016_4     1.981869
2017_1     2.542528
2017_2    -3.245214
Name: pnl, dtype: float64

In [ ]:


In [ ]:


In [ ]:


In [66]:


In [67]:


In [68]:



Out[68]:
fund_id found_date type_id year month
0 JR000001 2014-09-30 Type_02 2014 2014_9
1 JR000002 2015-02-05 Type_02 2015 2015_2
2 JR000003 2015-02-05 Type_02 2015 2015_2
3 JR000004 2015-02-05 Type_02 2015 2015_2
4 JR000008 2014-02-26 Type_02 2014 2014_2
5 JR000009 2013-07-22 Type_02 2013 2013_7
6 JR000012 2014-09-16 Type_02 2014 2014_9
7 JR000013 2015-01-06 Type_02 2015 2015_1
8 JR000020 2015-05-19 Type_02 2015 2015_5
9 JR000033 2015-06-08 Type_02 2015 2015_6
10 JR000035 2011-03-22 Type_02 2011 2011_3
11 JR000036 2011-01-08 Type_02 2011 2011_1
12 JR000041 2011-09-23 Type_02 2011 2011_9
13 JR000042 2011-05-20 Type_02 2011 2011_5
14 JR000045 2012-03-01 Type_02 2012 2012_3
15 JR000047 2012-08-03 Type_02 2012 2012_8
16 JR000051 2012-03-29 Type_02 2012 2012_3
17 JR000054 2012-07-16 Type_02 2012 2012_7
18 JR000059 2013-02-06 Type_02 2013 2013_2
19 JR000063 2009-12-25 Type_02 2009 2009_12
20 JR000064 2010-01-28 Type_02 2010 2010_1
21 JR000066 2010-02-24 Type_02 2010 2010_2
22 JR000068 2010-07-16 Type_02 2010 2010_7
23 JR000070 2010-07-08 Type_02 2010 2010_7
24 JR000071 2010-06-12 Type_02 2010 2010_6
25 JR000072 2010-07-01 Type_02 2010 2010_7
26 JR000073 2010-09-21 Type_02 2010 2010_9
27 JR000074 2010-09-28 Type_02 2010 2010_9
28 JR000075 2010-10-27 Type_02 2010 2010_10
29 JR000076 2010-10-22 Type_02 2010 2010_10
... ... ... ... ... ...
81829 JR119859 2017-01-13 Type_11 2017 2017_1
81830 JR119861 2017-01-13 Type_11 2017 2017_1
81831 JR119862 2017-01-13 Type_11 2017 2017_1
81832 JR119865 2017-01-11 Type_11 2017 2017_1
81833 JR119866 2017-01-11 Type_11 2017 2017_1
81834 JR119867 2017-01-11 Type_11 2017 2017_1
81835 JR119868 2017-01-11 Type_11 2017 2017_1
81836 JR119869 2017-01-10 Type_11 2017 2017_1
81837 JR119877 2016-12-23 Type_11 2016 2016_12
81838 JR119879 2016-12-22 Type_11 2016 2016_12
81839 JR119880 2016-03-25 Type_11 2016 2016_3
81840 JR119881 2015-04-27 Type_11 2015 2015_4
81841 JR119802 2017-01-25 Type_11 2017 2017_1
81842 JR119803 2017-01-25 Type_11 2017 2017_1
81843 JR119804 2017-01-25 Type_11 2017 2017_1
81844 JR119833 2017-01-18 Type_11 2017 2017_1
81845 JR119836 2017-01-18 Type_11 2017 2017_1
81846 JR119844 2017-01-18 Type_11 2017 2017_1
81847 JR119848 2017-01-17 Type_11 2017 2017_1
81848 JR119851 2017-01-16 Type_11 2017 2017_1
81849 JR119860 2017-01-13 Type_11 2017 2017_1
81850 JR119863 2017-01-12 Type_11 2017 2017_1
81851 JR119864 2017-01-12 Type_11 2017 2017_1
81852 JR119870 2017-01-10 Type_11 2017 2017_1
81853 JR119871 2017-01-10 Type_11 2017 2017_1
81854 JR119873 2016-12-30 Type_11 2016 2016_12
81855 JR119875 2016-12-27 Type_11 2016 2016_12
81856 JR119872 2017-01-09 Type_11 2017 2017_1
81857 JR119874 2016-12-30 Type_11 2016 2016_12
81858 JR119876 2016-12-26 Type_11 2016 2016_12

81859 rows × 5 columns


In [ ]:


In [69]:


In [70]:



Out[70]:
fund_id
type_id year
Type_01 2007 1
2009 1
2010 4
2011 9
2012 35
2013 98
2014 501
2015 716
2016 493
Type_02 2001 1
2003 4
2004 3
2005 4
2006 15
2007 90
2008 101
2009 309
2010 585
2011 610
2012 667
2013 2200
2014 9160
2015 17520
2016 16118
Type_03 2007 1
2009 5
2010 8
2011 3
2012 21
2013 90
... ... ...
Type_09 2013 47
2014 264
2015 1317
2016 1590
Type_10 2006 2
2007 2
2008 1
2009 7
2010 27
2011 33
2012 17
2013 36
2014 179
2015 821
2016 456
Type_11 1995 1
2000 1
2002 3
2006 2
2007 5
2008 5
2009 7
2010 35
2011 57
2012 66
2013 121
2014 776
2015 6240
2016 3558
2017 78

108 rows × 1 columns


In [ ]: